Source codes: https://github.com/Anne-LiseGerard/dftd_rnaseq Data: https://doi.org/10.1007/s00018-019-03259-2

suppressPackageStartupMessages({
    library("reshape2")
    library("gplots")
    library("DESeq2")
    library("mitch")
    library("limma")
    library("kableExtra")
    library("dplyr")
    library("tidyr")
    library("ComplexHeatmap")
    library("RColorBrewer")
    library("circlize")
    library("pathview")
    library("stringr")
    library("readxl")
    library("grid")
    library("ggplot2")
    library("viridis")
    library("enrichplot")
})

knitr::opts_chunk$set(dev = 'svg')

Background

Goal: re-analyse previously published DFTD tumour biopsy transcriptomes and compare to DFTD cell line transcriptomes, focusing on genes involved in metabolism. Below is a list of the samples for this project.

ss <- read.table("../ss_patchett.txt",sep="\t",fill=TRUE,header=TRUE)
ss$DFT <- as.factor(ss$DFT)

ss %>%
  kbl(caption="Sample sheet for all samples") %>%
  kable_paper("hover", full_width = F)
Sample sheet for all samples
run_accession sample_id DFT sample sample_type
ERR2804403 sha_DFT1_1-1 DFT1 DFT1_1 biopsy
ERR2804404 sha_DFT1_1-2 DFT1 DFT1_1 biopsy
ERR2804405 sha_DFT1_2-1 DFT1 DFT1_2 biopsy
ERR2804406 sha_DFT1_2-2 DFT1 DFT1_2 biopsy
ERR2804407 sha_DFT2_1-1 DFT2 DFT2_1 biopsy
ERR2804408 sha_DFT2_1-2 DFT2 DFT2_1 biopsy
ERR2804409 sha_DFT2_2-1 DFT2 DFT2_2 biopsy
ERR2804410 sha_DFT2_2-2 DFT2 DFT2_2 biopsy
ERR2852729 sha_DFT2_RV_2-2 DFT2 RV_2 cell_line
ERR2852728 sha_DFT2_RV_2-1 DFT2 RV_2 cell_line
ERR2852727 sha_DFT2_RV_1-2 DFT2 RV_1 cell_line
ERR2852726 sha_DFT2_RV_1-1 DFT2 RV_1 cell_line
SRR6266557 C5065_UT_01 DFT1 C5065 cell_line
SRR6266556 C5065_UT_02 DFT1 C5065 cell_line

Functions

# volcanoplots
make_volcano <- function(de,name) {
    sig <- subset(de,padj<0.05)
    N_SIG=nrow(sig)
    N_UP=nrow(subset(sig,log2FoldChange>0))
    N_DN=nrow(subset(sig,log2FoldChange<0))
    DET=nrow(de)
    HEADER=paste(N_SIG,"@5%FDR,", N_UP, "up", N_DN, "dn", DET, "detected")
    plot(de$log2FoldChange,-log10(de$padj),cex=1,pch=19,col="#D5D7E2",
        main=name, xlab="log2 FC", ylab="-log10 pval")
    mtext(HEADER)
    grid()
    points(sig$log2FoldChange,-log10(sig$padj),cex=1,pch=19,col="#5297A7")
}

#heatmaps
custom_heatmap <- function(zscore, fc, aveexpr, title) {
  h1 <- Heatmap(zscore, cluster_rows = F,
              column_labels = colnames(zscore), name="Z-score",
              cluster_columns = T, 
              column_names_gp = gpar(fontsize = 7, fontface="bold"),
              col=colorRamp2(c(-1.5,0,1.5), hcl_palette = "Blue-Red 2"),
              heatmap_legend_param = list(title = "Z-score", at = seq(-1.5, 1.5, 0.5)))
  h2 <- Heatmap(fc, row_labels = rownames(fc), row_names_gp = gpar(fontsize = 6),
              cluster_rows = F, name="logFC", col = col_logFC,column_names_gp = gpar(fontsize =7,fontface="bold"),
              cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
                grid.text(round(as.numeric(fc[i, j],2),2), x, y,gp=gpar(fontsize=6))
              }, column_title = title, 
              heatmap_legend_param = list(title = "logFC", at = seq(-20, 20, 10)))

  h <- h1 + h2
  h
}

Load data

Import data from the aligner.

tmp <- read.table("../fastq/3col_patchett.tsv.gz")
tmp$V1 <- gsub('.fastq-trimmed.fastq','',tmp$V1)
x <- as.data.frame(acast(tmp, V2~V1, value.var="V3",fun.aggregate=sum))

tmp_old <- read.table("../fastq/3col_old.tsv.gz")
tmp_old$V1 <- gsub('.fastq-trimmed.fastq','',tmp_old$V1)
x_old <- as.data.frame(acast(tmp_old, V2~V1, value.var="V3",fun.aggregate=sum))

Load gene names.

gn <- read.table("../ref/Sarcophilus_harrisii.mSarHar1.11.cdna+ncrna.genenames.tsv",fill=TRUE)
gn <- gn[order(gn$V1),]

gn_old <- read.table("../ref/Sarcophilus_harrisii.DEVIL7.0.cdna+ncrna.genenames.tsv",fill=TRUE)
gn_old <- gn_old[order(gn_old$V1),]

Load homology table.

hm <- read.table("../ref/mart_export_ensembl109_2023-07-14.txt",sep="\t",header=TRUE)

hm_old <- read.table("../ref/mart_export_ensembl101_2023-09-06.txt",sep="\t",header=TRUE)

Now need to collapse transcript data to genes.

x$gene <- paste(gn$V2,gn$V3)

y <- aggregate(. ~ gene,x,sum)

rownames(y) <- y$gene
y$gene = NULL


x_old$gene <- paste(gn_old$V2,gn_old$V3)

y_old <- aggregate(. ~ gene,x_old,sum)

rownames(y_old) <- y_old$gene
y_old$gene = NULL

Quality control

Samples with <1M reads should be omitted. Will also round values to integers. Using the new genome yields a higher number of mapped reads (this data was originally mapped on the DEVIL 7.0 assembly in Patchett 2019).

cs <- colSums(y)
cs <- cs[order(cs)]

par(mar=c(5,10,5,2))
barplot(cs,main="All samples",horiz=TRUE,las=1)
abline(v=1e7,col="red",lty=2)

y <- round(y)


cs_old <- colSums(y_old)
cs_old <- cs_old[order(cs_old)]

par(mar=c(5,10,5,2))
barplot(cs_old,main="All samples - old genome ",horiz=TRUE,las=1, xli=c(0,4e+07))
abline(v=1e7,col="red",lty=2)

y_old <- round(y_old)

MDS

This will help us to visualise the sources of variability in the overall dataset. Plot MDS. Also fix the sample names. Plot MDS by DFT. Plot MDS by cell lines and biiopsies. Decided to only include biopsies for rest of analysis.

par(mar = c(5.1, 4.1, 4.1, 2.1))

colnames(y) <- ss$sample_id

saveRDS(y, file = "y_biopsies.rds")

cs <- colSums(y)
cs <- cs[order(cs)]

par(mar=c(5,10,5,2))
barplot(cs,main="All samples",horiz=TRUE,las=1)

par(mar = c(5.1, 4.1, 4.1, 2.1))

colnames(y_old) <- ss$sample_id[1:8]

cs_old <- colSums(y_old)
cs_old <- cs_old[order(cs_old)]

par(mar=c(5,10,5,2))
barplot(cs_old,main="All samples - old genome",horiz=TRUE,las=1, xlim=c(0,4e+07))

par(mar = c(5.1, 4.1, 4.1, 2.1) )

cols <- ss$DFT
cols <- gsub("DFT1","#B3B9DF",cols)
cols <- gsub("DFT2","#DDAFB7",cols)
mymds <- plotMDS(y,plot=FALSE)

mymds_old <- plotMDS(y_old,plot=FALSE)

# fix the xlims
XMIN=min(mymds$x)
XMAX=max(mymds$x)
XMID=(XMAX+XMIN)/2
XMIN <- XMID + (XMIN-XMID)*1.1
XMAX <- XMID+(XMAX-XMID)*1.1
plotMDS(mymds,pch=17,cex=3,col=cols,main="MDS plot",xlim=c(XMIN,XMAX))
text(mymds,labels=colnames(y))
mtext("blue=DFT1, pink=DFT2")

plotMDS(mymds_old,pch=17,cex=3,col=cols,main="MDS plot - old genome",xlim=c(XMIN,XMAX))
text(mymds_old,labels=colnames(y_old))
mtext("blue=DFT1, pink=DFT2")

y_DFT1 <- y[,colnames(y) %in% c("sha_DFT1_1-1","sha_DFT1_1-2","sha_DFT1_2-1","sha_DFT1_2-2",
                                "C5065_UT_01", "C5065_UT_02")]
y_DFT2 <- y[,colnames(y) %in% c("sha_DFT2_1-1","sha_DFT2_1-2","sha_DFT2_2-1","sha_DFT2_2-2",
                                "sha_DFT2_RV_1-1","sha_DFT2_RV_1-2","sha_DFT2_RV_2-1","sha_DFT2_RV_2-2")] 

y_biopsy <- y[,colnames(y) %in% c("sha_DFT1_1-1","sha_DFT1_1-2","sha_DFT1_2-1","sha_DFT1_2-2",
                                  "sha_DFT2_1-1","sha_DFT2_1-2","sha_DFT2_2-1","sha_DFT2_2-2")]
y_cline <- y[,colnames(y) %in% c( "C5065_UT_01", "C5065_UT_02",
                                "sha_DFT2_RV_1-1","sha_DFT2_RV_1-2","sha_DFT2_RV_2-1","sha_DFT2_RV_2-2")] 

mdsDFT1 <- plotMDS(y_DFT1,plot=FALSE)
plotMDS(mdsDFT1,pch=17,cex=3,col="#B3B9DF",main="MDS plot",xlim=c(XMIN,XMAX))
text(mdsDFT1,labels=colnames(y_DFT1))
mtext("DFT1")

mdsDFT2 <- plotMDS(y_DFT2,plot=FALSE)
plotMDS(mdsDFT2,pch=17,cex=3,col="#DDAFB7",main="MDS plot",xlim=c(XMIN,XMAX))
text(mdsDFT2, labels=colnames(y_DFT2))
mtext("DFT2")

mds_biopsy <- plotMDS(y_biopsy,plot=FALSE)
plotMDS(mds_biopsy,pch=17,cex=3,col=cols,main="Tumour biopsies",xlim=c(XMIN,XMAX))
text(mds_biopsy,labels=colnames(y_biopsy))
mtext("Tumour biopsies: blue=DFT1, pink=DFT2")

mds_cline <- plotMDS(y_cline,plot=FALSE)
plotMDS(mds_cline,pch=17,cex=3,col=cols,main="MDS plot",xlim=c(XMIN,XMAX))
text(mds_cline,labels=colnames(y_cline))
mtext("Cell lines: blue=DFT1, pink=DFT2")

DESeq2

Sum replicates then run differential expression analysis. For now, ignore alignment on old genome and cell line data.

y_cell <- y[,9:14]
y <- y[,1:8]

# combine replicates for each sample
DFT1_1 <- rowSums(y[,ss$sample=="DFT1_1"])
DFT1_2 <- rowSums(y[,ss$sample=="DFT1_2"])
DFT2_1 <- rowSums(y[,ss$sample=="DFT2_1"])
DFT2_2 <- rowSums(y[,ss$sample=="DFT2_2"])

y2 <- data.frame(DFT1_1,DFT1_2,DFT2_1,DFT2_2)
ss2 <- as.data.frame(colnames(y2))
colnames(ss2) <- "sample"
ss2$DFT <- factor(c("DFT1","DFT1","DFT2","DFT2"))

y2 <- y2[which(rowMeans(y2)>10),] # remove genes with average count < 10
dim(y2)
## [1] 15931     4
dds <- DESeqDataSetFromMatrix(countData = y2 , colData = ss2, design = ~ DFT )
## converting counts to integer mode
dds2 <- dds
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds)
zz<-cbind(as.data.frame(z),assay(vsd))
dge<-as.data.frame(zz[order(zz$pvalue),])
dge2 <- dge
sig <- subset(dge,padj<0.05)
sig2 <- sig
sig2_up <- rownames(subset(sig,log2FoldChange>0))
sig2_dn <- rownames(subset(sig,log2FoldChange<0))
length(sig2_up)
## [1] 2536
length(sig2_dn)
## [1] 2033
make_volcano(dge2,"Tumour biopsies")

sig[1:50,1:6] %>%
  kbl(caption="Tumour biopsies") %>%
  kable_paper("hover", full_width = F)
Tumour biopsies
baseMean log2FoldChange lfcSE stat pvalue padj
ENSSHAG00000031627.1 PRX 19373.4283 -8.968977 0.2894364 -30.98773 0 0
ENSSHAG00000001230.2 NA 10348.7183 -6.799811 0.2540229 -26.76849 0 0
ENSSHAG00000024692.1 NA 4999.3676 -6.180271 0.2714882 -22.76442 0 0
ENSSHAG00000028855.1 NA 7373.2103 8.218378 0.3645190 22.54582 0 0
ENSSHAG00000012247.2 COL9A1 4015.0597 8.997323 0.4019274 22.38544 0 0
ENSSHAG00000022172.1 HAPLN2 6695.1868 -10.289516 0.4742283 -21.69739 0 0
ENSSHAG00000008158.2 NA 13177.6995 9.309236 0.4323846 21.52999 0 0
ENSSHAG00000021199.1 UGT8 12350.1972 -10.371276 0.4882968 -21.23970 0 0
ENSSHAG00000025070.1 NA 1894.6094 -7.072079 0.3410819 -20.73425 0 0
ENSSHAG00000011362.2 NA 2280.0991 7.982211 0.3989326 20.00892 0 0
ENSSHAG00000030052.1 CFTR 3494.4469 -9.939796 0.5041830 -19.71466 0 0
ENSSHAG00000028043.1 NA 9433.1352 -7.017523 0.3623763 -19.36530 0 0
ENSSHAG00000000720.2 ANO1 4017.4307 -4.669392 0.2435018 -19.17601 0 0
ENSSHAG00000010203.2 NA 3321.1501 -5.790986 0.3031099 -19.10523 0 0
ENSSHAG00000013813.2 NA 1817.3236 8.295411 0.4353132 19.05619 0 0
ENSSHAG00000028708.1 NA 3398.8519 6.491294 0.3425299 18.95103 0 0
ENSSHAG00000024629.1 NA 1343.7559 6.383444 0.3399438 18.77794 0 0
ENSSHAG00000016586.2 NA 1835.1747 -6.920324 0.3884226 -17.81648 0 0
ENSSHAG00000006184.2 ATCAY 1670.3603 9.361726 0.5279151 17.73339 0 0
ENSSHAG00000024135.1 STK32B 2034.3346 9.519390 0.5470800 17.40036 0 0
ENSSHAG00000028637.1 NA 4152.7889 -5.630734 0.3245807 -17.34772 0 0
ENSSHAG00000014038.2 NA 12218.7615 5.503863 0.3213211 17.12886 0 0
ENSSHAG00000014560.2 PLLP 2929.6765 -7.645683 0.4475932 -17.08177 0 0
ENSSHAG00000028146.1 NSG1 2831.2843 -6.946736 0.4078914 -17.03085 0 0
ENSSHAG00000021888.1 PRSS35 1185.1224 5.763210 0.3446391 16.72245 0 0
ENSSHAG00000004800.2 ALKAL2 1461.3473 9.459052 0.5799044 16.31140 0 0
ENSSHAG00000028187.1 NA 1148.8575 -5.776665 0.3563146 -16.21226 0 0
ENSSHAG00000012895.2 NA 901.0142 -6.245494 0.3879777 -16.09756 0 0
ENSSHAG00000006714.2 P3H2 2310.5733 6.110694 0.3858657 15.83632 0 0
ENSSHAG00000011465.2 NA 2880.8002 -6.766093 0.4335532 -15.60614 0 0
ENSSHAG00000027357.1 DHH 1255.9489 -5.030988 0.3229230 -15.57953 0 0
ENSSHAG00000013967.2 ALDOC 2233.7205 -5.021646 0.3241752 -15.49053 0 0
ENSSHAG00000016224.2 NECTIN4 882.6687 -4.862932 0.3156142 -15.40783 0 0
ENSSHAG00000029081.1 FGF10 632.5331 7.164789 0.4652379 15.40027 0 0
ENSSHAG00000031559.1 FHDC1 1379.8599 5.027249 0.3269925 15.37420 0 0
ENSSHAG00000014878.2 EFS 987.1933 4.920944 0.3247012 15.15530 0 0
ENSSHAG00000020854.1 FMN2 765.5943 -7.665817 0.5113521 -14.99127 0 0
ENSSHAG00000008456.2 NOTCH3 3770.3176 4.007920 0.2717511 14.74850 0 0
ENSSHAG00000012189.2 NA 1655.7477 4.669329 0.3205060 14.56862 0 0
ENSSHAG00000023680.1 EGR3 5429.7319 -4.718157 0.3271562 -14.42173 0 0
ENSSHAG00000006005.2 NA 940.6342 7.989749 0.5545270 14.40822 0 0
ENSSHAG00000017676.2 ARHGAP22 727.6463 5.957871 0.4175935 14.26715 0 0
ENSSHAG00000010663.2 MAP1B 5140.0041 4.022572 0.2836538 14.18127 0 0
ENSSHAG00000007917.2 EDNRB 13288.1593 4.328769 0.3052459 14.18125 0 0
ENSSHAG00000021589.1 EBF2 996.6855 -6.138741 0.4339999 -14.14457 0 0
ENSSHAG00000024394.1 NA 1939.7779 4.717647 0.3350385 14.08091 0 0
ENSSHAG00000016955.2 CCDC68 3306.2354 -7.239744 0.5152135 -14.05193 0 0
ENSSHAG00000030008.1 NA 533.2343 5.281241 0.3765384 14.02577 0 0
ENSSHAG00000010841.2 PCDH7 506.1298 5.613175 0.4019355 13.96536 0 0
ENSSHAG00000009869.2 PXDC1 1400.8423 3.930310 0.2815769 13.95821 0 0
write.table(dge2,file="dge3.tsv",sep="\t")
# top 20 up & top 20 down
sig <- sig[order(sig$log2FoldChange, decreasing=TRUE),]

sig_noNA <- sig
sig_noNA$genes <- rownames(sig_noNA)
sig_noNA <- filter(sig_noNA, !grepl(' NA', genes))

top <- rbind(head(sig_noNA,20),tail(sig_noNA,20))

mx <- top[,7:ncol(top)] # get normalised counts

mx.scaled <- t(apply(mx[,1:4], 1, scale)) # center and scale each column (Z-score)
colnames(mx.scaled) <- colnames(mx[,1:4])
colnames(mx.scaled) <- c("DFT1 1", "DFT1 2", "DFT2 1", "DFT2 2")
log2FC <- as.matrix(top$log2FoldChange)
rownames(log2FC) <- rownames(top)
colnames(log2FC) <- "logFC"
mean <- as.matrix(top$baseMean)
rownames(mean) <- rownames(top)
colnames(mean) <- "AveExpr"


col_logFC <- colorRamp2(c(-20,0,20), hcl_palette = "Blue-Red 2")
custom_heatmap <- function(zscore, fc, aveexpr, title) {
  h1 <- Heatmap(zscore, cluster_rows = F,
              column_labels = colnames(zscore), name="Z-score",
              cluster_columns = T, 
              column_names_gp = gpar(fontsize = 7, fontface="bold"),
              col=colorRamp2(c(-1.5,0,1.5), hcl_palette = "Blue-Red 2"),
              heatmap_legend_param = list(title = "Z-score", at = seq(-1.5, 1.5, 0.5)))
  h2 <- Heatmap(fc, row_labels = rownames(fc), row_names_gp = gpar(fontsize = 6),
              cluster_rows = F, name="logFC", col = col_logFC,column_names_gp = gpar(fontsize =7,fontface="bold"),
              cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
                grid.text(round(as.numeric(fc[i, j],2),2), x, y,gp=gpar(fontsize=6))
              }, column_title = title, 
              heatmap_legend_param = list(title = "logFC", at = seq(-20, 20, 10)))

  h <- h1 + h2 
  h
}

ha <- HeatmapAnnotation(summary=anno_summary(gp=gpar(fill="#D5D7E2"),height=unit(2, "cm")))

rownames(mx.scaled) <- gsub("^.{0,21}", "", rownames(mx.scaled))
rownames(log2FC) <- gsub("^.{0,21}", "", rownames(log2FC))


top40 <- custom_heatmap(mx.scaled, log2FC, mean, title = "Tumour biopsies")

top40

saveRDS(top40, "top40_biopsies.RDS")

Enrichment analysis

Need to get human homologs of these genes. These were obtained from Ensembl v109 biomart.

rownames(dge2) <- sapply(strsplit(rownames(dge2),"\\."),"[[",1)

hm2 <- hm[hm$Tasmanian.devil.gene.stable.ID != "",]

gt <- hm2[,2:3]
length(unique(gt$Tasmanian.devil.gene.stable.ID))
## [1] 16979
length(unique(gt$Gene.name))
## [1] 16646
for(i in 1:length(gt$Gene.name)){
  if(gt$Gene.name[i]==""){gt$Gene.name[i] <- gt$Tasmanian.devil.gene.stable.ID[i]}
}

saveRDS(gt, "~/dftd_RNAseq_annelise/dge/gt.rds")

Now run mitch for DGE2. The pathways are sourced from Reactome 7th July 2023. Plot significant pathways, top 20 and metabolism related pathways.

genesets <- gmt_import("../ref/ReactomePathways_2023-07-14.gmt")

m2 <- mitch_import(dge2, DEtype="deseq2", geneTable=gt)
## The input is a single dataframe; one contrast only. Converting
##         it to a list for you.
## Note: Mean no. genes in input = 15931
## Note: no. genes in output = 12859
## Note: estimated proportion of input genes in output = 0.807
head(m2)
##                 x
## A2M    -0.3375456
## A4GALT  4.2591767
## AAAS    1.7285141
## AACS    0.7228111
## AADAC   1.5043061
## AADAT  -0.3392250
res2 <- mitch_calc(m2, genesets, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
if ( !file.exists("mitch_biopsies_reactome.html") ) {
  mitch_report(res2, "mitch_biopsies_reactome.html")
}

top <- subset(res2$enrichment_result,p.adjustANOVA<0.05)
saveRDS(top, "~/dftd_RNAseq_annelise/dge/gsea_biopsies.rds")

top %>%
  kbl(caption="Enriched pathways") %>%
  kable_paper("hover", full_width = F)
Enriched pathways
set setSize pANOVA s.dist p.adjustANOVA
1335 Translocation of ZAP-70 to Immunological synapse 11 0.0000044 0.7992684 0.0000792
842 Phosphorylation of CD3 and TCR zeta chains 14 0.0000003 0.7931880 0.0000060
802 PD-1 signaling 15 0.0000003 0.7636614 0.0000065
549 Initial triggering of complement 44 0.0000000 0.7608112 0.0000000
222 Creation of C4 and C2 activators 38 0.0000000 0.7577515 0.0000000
121 CD22 mediated BCR regulation 25 0.0000000 0.7538538 0.0000000
195 Classical antibody-mediated complement activation 35 0.0000000 0.7537497 0.0000000
1100 Scavenging of heme from plasma 35 0.0000000 0.7277048 0.0000000
206 Complement cascade 60 0.0000000 0.7181902 0.0000000
533 Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell 97 0.0000000 0.7163551 0.0000000
988 Regulation of Complement cascade 56 0.0000000 0.7094194 0.0000000
371 FCGR activation 41 0.0000000 0.7089021 0.0000000
577 Interleukin-2 signaling 10 0.0001560 0.6905284 0.0020862
180 Chemokine receptors bind chemokines 37 0.0000000 0.6890395 0.0000000
1099 Scavenging by Class A Receptors 17 0.0000020 0.6662514 0.0000383
459 Generation of second messenger molecules 24 0.0000000 0.6532106 0.0000008
110 Binding and Uptake of Ligands by Scavenger Receptors 63 0.0000000 0.6435109 0.0000000
60 Advanced glycosylation endproduct receptor signaling 13 0.0001046 0.6214386 0.0014981
154 Caspase activation via Death Receptors in the presence of ligand 13 0.0001210 0.6157259 0.0016347
68 Anchoring fibril formation 11 0.0006052 0.5971357 0.0072117
571 Interleukin-10 signaling 28 0.0000001 0.5929891 0.0000013
368 FCERI mediated Ca+2 mobilization 57 0.0000000 0.5893638 0.0000000
705 MyD88 deficiency (TLR2/4) 13 0.0002920 0.5802285 0.0037307
848 Platelet Adhesion to exposed collagen 11 0.0009145 0.5773520 0.0098235
225 Crosslinking of collagen fibrils 15 0.0001097 0.5768400 0.0015252
1059 Role of phospholipids in phagocytosis 53 0.0000000 0.5753833 0.0000000
39 Activation of Matrix Metalloproteinases 18 0.0000244 0.5745529 0.0003722
1010 Regulation of TLR by endogenous ligand 13 0.0003362 0.5743721 0.0041818
795 Other semaphorin interactions 17 0.0000550 0.5650210 0.0008125
72 Antigen activates B Cell Receptor (BCR) leading to generation of second messengers 50 0.0000000 0.5615552 0.0000000
75 Antimicrobial peptides 20 0.0000159 0.5573954 0.0002568
526 IRAK4 deficiency (TLR2/4) 14 0.0003191 0.5555970 0.0040043
442 GPVI-mediated activation cascade 30 0.0000002 0.5501338 0.0000041
674 Metallothioneins bind metals 10 0.0028306 0.5452564 0.0259787
567 Interferon gamma signaling 61 0.0000000 0.5414114 0.0000000
1217 Striated Muscle Contraction 32 0.0000001 0.5398184 0.0000029
351 Endosomal/Vacuolar pathway 10 0.0033291 0.5361351 0.0295038
1058 Role of LAT2/NTAL/LAB on calcium mobilization 42 0.0000000 0.5332483 0.0000001
1052 Response to metal ions 13 0.0009490 0.5294674 0.0100422
112 Biosynthesis of specialized proresolving mediators (SPMs) 11 0.0025923 0.5245670 0.0241832
203 Collagen degradation 46 0.0000000 0.5226893 0.0000000
202 Collagen chain trimerization 30 0.0000011 0.5146725 0.0000223
372 FCGR3A-mediated IL10 synthesis 62 0.0000000 0.5122307 0.0000000
164 Cell recruitment (pro-inflammatory response) 24 0.0000229 0.4993897 0.0003529
891 Purinergic signaling in leishmaniasis infection 24 0.0000229 0.4993897 0.0003529
566 Interferon alpha/beta signaling 50 0.0000000 0.4973144 0.0000000
369 FCERI mediated MAPK activation 57 0.0000000 0.4945321 0.0000000
239 DAP12 interactions 29 0.0000045 0.4922407 0.0000795
1121 Signal regulatory protein family interactions 11 0.0059834 0.4786666 0.0466178
281 Degradation of cysteine and homocysteine 11 0.0060363 -0.4781643 0.0466957
909 RHO GTPases Activate NADPH Oxidases 20 0.0002313 0.4756523 0.0029815
1416 tRNA processing in the mitochondrion 17 0.0007667 0.4713853 0.0084276
509 Heme degradation 12 0.0049105 0.4690005 0.0395627
328 ECM proteoglycans 60 0.0000000 0.4671459 0.0000000
1279 The NLRP3 inflammasome 14 0.0028397 0.4607463 0.0259787
201 Collagen biosynthesis and modifying enzymes 51 0.0000000 0.4534237 0.0000006
87 Assembly of collagen fibrils and other multimeric structures 48 0.0000001 0.4500397 0.0000016
559 Integrin cell surface interactions 69 0.0000000 0.4465287 0.0000000
165 Cell surface interactions at the vascular wall 141 0.0000000 0.4299920 0.0000000
330 EGR2 and SOX10-mediated initiation of Schwann cell myelination 28 0.0001084 -0.4227429 0.0015215
1188 Signaling by RAF1 mutants 36 0.0000129 0.4201478 0.0002107
70 Anti-inflammatory response favouring Leishmania parasite infection 87 0.0000000 0.4198318 0.0000000
616 Leishmania parasite growth and survival 87 0.0000000 0.4198318 0.0000000
1257 TNFs bind their physiological receptors 18 0.0022184 0.4165780 0.0212542
373 FCGR3A-mediated phagocytosis 85 0.0000000 0.4150609 0.0000000
617 Leishmania phagocytosis 85 0.0000000 0.4150609 0.0000000
828 Parasite infection 85 0.0000000 0.4150609 0.0000000
827 Paradoxical activation of RAF signaling by kinase inactive BRAF 37 0.0000126 0.4149456 0.0002081
1189 Signaling by RAS mutants 37 0.0000126 0.4149456 0.0002081
1204 Signaling by moderate kinase activity BRAF mutants 37 0.0000126 0.4149456 0.0002081
1207 Signaling downstream of RAS mutants 37 0.0000126 0.4149456 0.0002081
204 Collagen formation 70 0.0000000 0.4102901 0.0000001
1252 TNF receptor superfamily (TNFSF) members mediating non-canonical NF-kB pathway 15 0.0060592 0.4093533 0.0466957
194 Class I peroxisomal membrane protein import 17 0.0035002 -0.4091263 0.0306371
998 Regulation of KIT signaling 16 0.0053356 0.4023593 0.0425050
576 Interleukin-2 family signaling 35 0.0000393 0.4016754 0.0005873
1019 Regulation of actin dynamics for phagocytic cup formation 86 0.0000000 0.3990895 0.0000000
1210 Smooth Muscle Contraction 32 0.0001141 0.3942173 0.0015709
1203 Signaling by high-kinase activity BRAF mutants 31 0.0001820 0.3885050 0.0023899
1122 Signal transduction by L1 21 0.0021111 0.3875882 0.0203643
240 DAP12 signaling 23 0.0012989 0.3874937 0.0132508
300 Diseases associated with the TLR signaling cascade 26 0.0007412 0.3823526 0.0082116
303 Diseases of Immune System 26 0.0007412 0.3823526 0.0082116
852 Platelet degranulation 95 0.0000000 0.3822717 0.0000000
392 Fcgamma receptor (FCGR) dependent phagocytosis 110 0.0000000 0.3730596 0.0000000
555 Insulin processing 21 0.0031419 -0.3723841 0.0281972
345 Effects of PIP2 hydrolysis 24 0.0017611 0.3689261 0.0175234
1051 Response to elevated platelet cytosolic Ca2+ 100 0.0000000 0.3664213 0.0000000
781 Nucleotide catabolism 27 0.0010380 0.3647883 0.0108229
282 Degradation of the extracellular matrix 100 0.0000000 0.3640685 0.0000000
568 Interleukin receptor SHC signaling 22 0.0032708 0.3623050 0.0291699
637 MET activates PTK2 signaling 27 0.0011344 0.3619943 0.0116564
714 NCAM1 interactions 32 0.0004665 0.3575368 0.0056544
1319 Transcriptional regulation of granulopoiesis 32 0.0005804 0.3515485 0.0069743
628 MAP2K and MAPK activation 35 0.0003747 0.3475804 0.0045799
289 Detoxification of Reactive Oxygen Species 30 0.0010237 0.3465222 0.0107527
156 Caspase activation via extrinsic apoptotic signalling pathway 23 0.0042019 0.3449029 0.0354660
1285 The role of Nef in HIV-1 replication and disease pathogenesis 22 0.0064129 0.3358049 0.0488899
73 Antigen processing-Cross presentation 92 0.0000000 0.3352929 0.0000007
931 RIPK1-mediated regulated necrosis 24 0.0047287 0.3332165 0.0385359
1032 Regulation of necroptotic cell death 24 0.0047287 0.3332165 0.0385359
850 Platelet activation, signaling and aggregation 204 0.0000000 0.3317800 0.0000000
366 Extracellular matrix organization 228 0.0000000 0.3310117 0.0000000
615 Leishmania infection 164 0.0000000 0.3299292 0.0000000
829 Parasitic Infection Pathways 164 0.0000000 0.3299292 0.0000000
590 Intraflagellar transport 45 0.0001725 -0.3237882 0.0022866
982 Regulated Necrosis 43 0.0003516 0.3151493 0.0043357
71 Antigen Presentation: Folding, assembly and peptide loading of class I MHC 26 0.0055568 0.3142649 0.0440196
997 Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs) 80 0.0000013 0.3129235 0.0000271
1206 Signaling by the B Cell Receptor (BCR) 120 0.0000000 0.3098974 0.0000001
849 Platelet Aggregation (Plug Formation) 31 0.0029356 0.3087802 0.0266055
338 ER-Phagosome pathway 79 0.0000030 0.3041788 0.0000562
583 Interleukin-4 and Interleukin-13 signaling 90 0.0000007 0.3029385 0.0000147
370 FCERI mediated NF-kB activation 99 0.0000002 0.3017328 0.0000049
221 Costimulation by the CD28 family 58 0.0000717 0.3016346 0.0010477
1108 Semaphorin interactions 57 0.0001156 0.2954226 0.0015759
835 Peptide ligand-binding receptors 94 0.0000014 0.2879366 0.0000288
82 Arachidonic acid metabolism 35 0.0034208 0.2860307 0.0301285
511 Hemostasis 500 0.0000000 0.2822751 0.0000000
1183 Signaling by PDGF 50 0.0006336 0.2794879 0.0073166
191 Class A/1 (Rhodopsin-like receptors) 159 0.0000000 0.2775279 0.0000000
565 Interferon Signaling 139 0.0000000 0.2756363 0.0000006
580 Interleukin-3, Interleukin-5 and GM-CSF signaling 39 0.0029457 0.2752830 0.0266055
1247 TCR signaling 99 0.0000040 0.2686227 0.0000722
152 Cargo trafficking to the periciliary membrane 43 0.0023836 -0.2678846 0.0223835
863 Post-translational protein phosphorylation 71 0.0001017 0.2669867 0.0014713
391 Fc epsilon receptor (FCERI) signaling 145 0.0000000 0.2667312 0.0000008
866 Potential therapeutics for SARS 107 0.0000039 0.2587280 0.0000715
552 Innate Immune System 789 0.0000000 0.2524967 0.0000000
308 Diseases of programmed cell death 41 0.0053047 0.2517668 0.0424975
837 Peroxisomal protein import 53 0.0018131 -0.2478614 0.0177309
207 Complex I biogenesis 48 0.0036222 0.2428805 0.0314883
415 G alpha (i) signalling events 184 0.0000000 0.2418236 0.0000004
211 Constitutive Signaling by Aberrant PI3K in Cancer 58 0.0017672 0.2375569 0.0175234
713 NCAM signaling for neurite out-growth 50 0.0037640 0.2370084 0.0323477
323 Drug ADME 51 0.0038561 0.2340679 0.0327421
757 Neutrophil degranulation 377 0.0000000 0.2324427 0.0000000
703 Muscle contraction 140 0.0000023 0.2318584 0.0000436
416 G alpha (q) signalling events 130 0.0000053 0.2315554 0.0000923
189 Circadian Clock 60 0.0019527 -0.2313814 0.0189653
1055 Retrograde transport at the Trans-Golgi-Network 48 0.0057139 -0.2307327 0.0450130
1293 Toll Like Receptor 4 (TLR4) Cascade 120 0.0000170 0.2277135 0.0002673
232 Cytokine Signaling in Immune system 541 0.0000000 0.2270865 0.0000000
532 Immune System 1505 0.0000000 0.2259180 0.0000000
440 GPCR ligand binding 215 0.0000000 0.2255178 0.0000004
1299 Toll-like Receptor Cascades 139 0.0000050 0.2247664 0.0000869
1163 Signaling by Interleukins 361 0.0000000 0.2155405 0.0000000
664 Metabolism of nucleotides 79 0.0011043 0.2125542 0.0114295
56 Adaptive Immune System 641 0.0000000 0.2111023 0.0000000
439 GPCR downstream signalling 370 0.0000000 0.2073875 0.0000000
708 MyD88:MAL(TIRAP) cascade initiated on plasma membrane 96 0.0006450 0.2017804 0.0073166
1291 Toll Like Receptor 2 (TLR2) Cascade 96 0.0006450 0.2017804 0.0073166
1297 Toll Like Receptor TLR1:TLR2 Cascade 96 0.0006450 0.2017804 0.0073166
1298 Toll Like Receptor TLR6:TLR2 Cascade 96 0.0006450 0.2017804 0.0073166
1158 Signaling by GPCR 413 0.0000000 0.1947923 0.0000000
1047 Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins. 104 0.0007323 0.1919504 0.0082116
707 MyD88-independent TLR4 cascade 94 0.0014166 0.1907151 0.0142464
1270 TRIF(TICAM1)-mediated TLR4 signaling 94 0.0014166 0.1907151 0.0142464
188 Cilium Assembly 175 0.0000163 -0.1893355 0.0002604
632 MAPK1/MAPK3 signaling 229 0.0000012 0.1866800 0.0000252
316 Downstream TCR signaling 79 0.0044756 0.1852014 0.0371134
1256 TNFR2 non-canonical NF-kB pathway 81 0.0042376 0.1840325 0.0355557
903 RAF/MAP kinase cascade 223 0.0000026 0.1832912 0.0000489
1046 Respiratory electron transport 85 0.0036418 0.1826753 0.0314883
417 G alpha (s) signalling events 84 0.0047964 0.1782415 0.0388641
813 PI5P, PP2A and IER3 Regulate PI3K/AKT Signaling 81 0.0057480 0.1777225 0.0450312
1292 Toll Like Receptor 3 (TLR3) Cascade 91 0.0038457 0.1755548 0.0327421
630 MAPK family signaling cascades 262 0.0000015 0.1734749 0.0000289
305 Diseases of glycosylation 106 0.0045775 0.1596487 0.0377377
569 Interleukin-1 family signaling 124 0.0023057 0.1587719 0.0217969
643 Macroautophagy 115 0.0043005 -0.1544031 0.0358715
96 Autophagy 127 0.0027262 -0.1542947 0.0252662
694 Mitotic G1 phase and G1/S transition 133 0.0022749 0.1535634 0.0216497
792 Organelle biogenesis and maintenance 259 0.0002006 -0.1347337 0.0026094
906 RHO GTPase Effectors 229 0.0008667 0.1282010 0.0094533
1314 Transcriptional regulation by RUNX1 156 0.0061837 0.1273271 0.0473970
1061 SARS-CoV Infections 341 0.0001075 0.1227073 0.0015215
541 Infectious disease 760 0.0000000 0.1182588 0.0000010
295 Disease 1372 0.0000000 0.1021056 0.0000000
1370 Viral Infection Pathways 591 0.0000357 0.1004779 0.0005391
97 Axon guidance 449 0.0003132 0.0999467 0.0039651
1191 Signaling by Receptor Tyrosine Kinases 423 0.0006286 0.0975826 0.0073166
1119 Signal Transduction 1942 0.0000000 0.0951019 0.0000000
1193 Signaling by Rho GTPases 566 0.0008869 0.0824966 0.0095998
1194 Signaling by Rho GTPases, Miro GTPases and RHOBTB3 581 0.0018092 0.0764697 0.0177309
291 Developmental Biology 743 0.0009237 0.0722788 0.0098487
ggplot(top, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustANOVA, size = setSize)) + 
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Reactome ALL") + 
  theme_bw()

top20 <- rbind(slice_max(top, order_by=s.dist, n=10), slice_min(top, order_by=s.dist, n=10))

ggplot(top20, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustANOVA, size = setSize)) + 
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Reactome top 20") + 
  theme_bw()

react_metabosets <- readRDS("../ref/reactome_metabo.rds")

metabo1 <- filter(top, set %in% names(react_metabosets))
saveRDS(metabo1, "~/dftd_RNAseq_annelise/dge/gsea_biopsies_metabo.rds")

ggplot(metabo1 , aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustANOVA, size = setSize)) + 
  geom_point(stat = 'identity') + 
  scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
  xlab("s dist") + ylab("pathway") + ggtitle("Reactome metabolism") + 
  theme_bw()

Visualisation

Heatmaps. First plot enriched pathways, then plot pathways of interest for which experimental data is available.

# see https://www.youtube.com/watch?v=ht1r34-ifVI for reference

hm3 <- data.frame(ENSSHAG=hm2[,2],geneID=hm2[,3])
dge2 <- dge
dge2$ensembl <- rownames(dge2)
split <- strsplit(dge2$ensembl, split = " ")
ensembl <- c()
gene_name <- c()
for (i in 1:length(split)){
  ensembl <- append(ensembl, split[[i]][1]) 
  gene_name <- append(gene_name, split[[i]][2]) 
}
dge2$ensembl <- ensembl
dge2$gene_name <- gene_name

dge3 <- dge2

# only plot significant DEGs
dge3sig <- filter(dge3, padj < 0.05)
dge3sig <- dge3sig[order(dge3sig$log2FoldChange, decreasing=TRUE),]

mx <- dge3sig[,7:ncol(dge3sig)] # get normalised counts
 
mx.scaled <- t(apply(mx[,1:4], 1, scale)) # center and scale each column (Z-score)
colnames(mx.scaled) <- c("DFT1 1", "DFT1 2", "DFT2 1", "DFT2 2")
log2FC <- as.matrix(dge3sig$log2FoldChange)
rownames(log2FC) <- rownames(dge3sig)
colnames(log2FC) <- "logFC"
mean <- as.matrix(dge3sig$baseMean)
rownames(mean) <- rownames(dge3sig)
colnames(mean) <- "AveExpr"

col_logFC <- colorRamp2(c(-20,0,20), hcl_palette = "Blue-Red 2")
#col_AveExpr <- colorRamp2(c(quantile(mean)[1], quantile(mean)[4]), c("white","red"))

ha <- HeatmapAnnotation(summary=anno_summary(gp=gpar(fill="#D5D7E2"),height=unit(1.5, "cm")))

# ----- Top DEGs in REACTOME metabolic pathways ------

metabo <- c() # get all names of genes involved in metabolism
for(i in 1:length(genesets)){metabo <- c(metabo,genesets[[i]])}
metabo <- unique(metabo) # get rid of duplicates

temp <- c() # get ENSSHAG and gene ID
for(i in 1:length(metabo)){temp <- rbind(temp,filter(hm3, geneID==metabo[i]))}
metabolism <- c(t(unite(temp,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

# collapse transcript IDs to gene IDs

rownames(mx.scaled) <- sub('\\..*\\s', ' ', rownames(mx.scaled))
rownames(log2FC) <- sub('\\..*\\s', ' ', rownames(log2FC))
rownames(mean) <- sub('\\..*\\s', ' ', rownames(mean))
dge3$ensembl <- sub('\\..*', '', dge3$ensembl)
rownames(dge3) <- sub('\\..*\\s', ' ', rownames(dge3))

rownames(mx.scaled) <- gsub("^.{0,19}", "", rownames(mx.scaled))
rownames(log2FC) <- gsub("^.{0,19}", "", rownames(log2FC))

metabolism <- gsub("^.{0,19}", "", metabolism)

met <- subset(mx.scaled, rownames(mx.scaled) %in% metabolism)
metFC <- subset(log2FC, rownames(log2FC) %in% metabolism)
metAE <- subset(mean, rownames(mean) %in% metabolism)

# top 20 up & down
met40 <- rbind(head(met,20),tail(met,20))
met40FC <- as.matrix(c(head(metFC,20),tail(metFC,20)))
rownames(met40FC) <- rownames(met40)
colnames(met40FC) <- "logFC"
met40AE <- as.matrix(c(head(metAE,20),tail(metAE,20)))
#rownames(met40AE) <- rownames(met40)
#colnames(met40AE) <- "AveExpr"

custom_heatmap(met40, met40FC, met40AE, title = "TOP 40 - REACTOME metabolism")

# ----- All DEGs in REACTOME metabolic pathways ------

dge3_metabo_reactome <- filter(dge3, gene_name %in% metabo)

make_volcano(dge3_metabo_reactome, "Cell lines - Metabolism genes - REACTOME")

# ----- Individual TOP REACTOME metabolic pathways -----

pathways <- metabo1$set
heatmaps <- vector(mode = "list", length = length(pathways))
for(i in 1:length(pathways)){
  
  metabo <- c() # get all names of genes involved in metabolism
  for(j in 1:length(genesets)){metabo <- c(metabo,genesets[[pathways[i]]])}
  metabo <- unique(metabo) # get rid of duplicates

  temp <- c() # get ENSSHAG and gene ID
  for(k in 1:length(metabo)){temp <- rbind(temp,filter(hm3, geneID==metabo[k]))}
  metabolism <- c(t(unite(temp,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))
  
  metabolism <- gsub("^.{0,19}", "", metabolism)
  
  met <- subset(mx.scaled, rownames(mx.scaled) %in% metabolism)
  metFC <- subset(log2FC, rownames(log2FC) %in% metabolism)
  metAE <- subset(mean, rownames(mean) %in% metabolism)

  # react_pathw[[i]] <- custom_heatmap(met, metFC, metAE, title = paste(pathways[i], "- REACTOME"))
  heatmaps[[i]] <- custom_heatmap(met, metFC, metAE, title = paste(pathways[i]))
  
}
print(heatmaps[[1]])

print(heatmaps[[2]])

saveRDS(heatmaps[[2]], "cyst_b.rds")
print(heatmaps[[3]])

print(heatmaps[[4]])

print(heatmaps[[5]])

print(heatmaps[[6]])

print(heatmaps[[7]])

saveRDS(heatmaps[[7]], "nucl_b.rds")
print(heatmaps[[8]])

print(heatmaps[[9]])

# ----- Other interesting pathways REACTOME -----

mm <- filter(hm3, geneID %in% genesets[["Glycolysis"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)
metAE <- subset(mean, rownames(mean) %in% mm)

glyco_p <- custom_heatmap(met, metFC, title = "Tumour biopsies")

glyco_p

saveRDS(glyco_p, "glyco_p2.rds")
# ----- Other interesting pathways REACTOME -----

mm <- filter(hm3, geneID %in% genesets[["Cholesterol biosynthesis"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

glyco_p <- custom_heatmap(met, metFC, title = "Cholesterol biosynthesis")

glyco_p

saveRDS(glyco_p, "chol_b.rds")
# ----- Other interesting pathways REACTOME -----

mm <- filter(hm3, geneID %in% genesets[["Sulfur amino acid metabolism"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

glyco_p <- custom_heatmap(met, metFC, title = "Sulfur amino acid metabolism")

glyco_p

saveRDS(glyco_p, "sulf_b.rds")
mm <- filter(hm3, geneID %in% genesets[["Formation of ATP by chemiosmotic coupling"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

#mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "Formation of ATP by chemiosmotic coupling")

mm <- filter(hm3, geneID %in% genesets[["Citric acid cycle (TCA cycle)"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "Citric acid cycle (TCA cycle)")

mm <- filter(hm3, geneID %in% genesets[["Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins."]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "Respiratory electron transport, \n ATP synthesis by chemiosmotic coupling, \n and heat production by uncoupling proteins.")

mm <- filter(hm3, geneID %in% genesets[["DNA Repair"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "DNA Repair")

mm <- filter(hm3, geneID %in% genesets[["Cholesterol biosynthesis"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "Cholesterol biosynthesis")

mm <- filter(hm3, geneID %in% genesets[["Detoxification of Reactive Oxygen Species"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)
metAE <- subset(mean, rownames(mean) %in% mm)

ROS_p2 <- custom_heatmap(met, metFC, metAE, title = "Tumour biopsies")

ROS_p2 

saveRDS(ROS_p2, "ROS_p2.rds")
mm <- filter(hm3, geneID %in% genesets[["Pentose phosphate pathway"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "Pentose phosphate pathway")

mm <- filter(hm3, geneID %in% genesets[["Fatty acid metabolism"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "Fatty acid metabolism")

mm <- filter(hm3, geneID %in% genesets[["Glutamate and glutamine metabolism"]])
mm <- c(t(unite(mm,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

mm <- gsub("^.{0,19}", "", mm)

met <- subset(mx.scaled, rownames(mx.scaled) %in% mm)
metFC <- subset(log2FC, rownames(log2FC) %in% mm)

custom_heatmap(met, metFC, title = "Glutamate and glutamine metabolism")

Adding custom gene sets

Two custom gene sets were added to the previous analysis containing cell competition & metastasis related genes based on a literature search.

cell_compet_genes <- read_excel("../ref/gsea/Cell_compet_genes.xlsx")
names(cell_compet_genes)[names(cell_compet_genes) == "Ensembl_ID"] <- "Gene.stable.ID"

cell_compet_genes <- inner_join(cell_compet_genes,hm,by="Gene.stable.ID") # making sure these genes exist in devils
## Warning in inner_join(cell_compet_genes, hm, by = "Gene.stable.ID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 27 of `x` matches multiple rows in `y`.
## ℹ Row 65234 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
genesets$"Cell Competition" <- unique(cell_compet_genes$Gene.name)
genesets$"Cell Competition - Mechanical Competition" <- unique(filter(cell_compet_genes, Type == "Mechanical_Competition")$Gene.name)

metastasis_genes <- read_excel("../ref/gsea/Metastasis_genes.xlsx")
names(metastasis_genes)[names(metastasis_genes) == "Ensembl_ID"] <- "Gene.stable.ID"

metastasis_genes <- inner_join(metastasis_genes,hm,by="Gene.stable.ID") # making sure these genes exist in devils

genesets$"Metastasis" <- unique(metastasis_genes$Gene.name)
genesets$"Metastasis - promotors" <- unique(filter(metastasis_genes, Type == "Metastasis_promotor")$Gene.name)
genesets$"Metastasis - suppressors" <- unique(filter(metastasis_genes, Type == "Metastasis_supressor")$Gene.name)

res_custom <- mitch_calc(m2, genesets, priority="effect",cores=16)
## Note: Enrichments with large effect sizes may not be
##             statistically significant.
if ( !file.exists("mitch_custom_patchett.html") ) {
  mitch_report(res_custom, "mitch_custom_patchett.html")
}

cc <- filter(hm3, geneID %in% genesets[["Cell Competition"]])
cc <- c(t(unite(cc,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

cc <- gsub("^.{0,19}", "", cc)

met <- subset(mx.scaled, rownames(mx.scaled) %in% cc)
metFC <- subset(log2FC, rownames(log2FC) %in% cc)

custom_heatmap(met, metFC, title = "Cell competition")

meta <- filter(hm3, geneID %in% genesets[["Metastasis"]])
meta <- c(t(unite(meta,"ENSSHAG_geneID", ENSSHAG:geneID, sep=" ")))

meta <- gsub("^.{0,19}", "", meta)

met <- subset(mx.scaled, rownames(mx.scaled) %in% meta)
metFC <- subset(log2FC, rownames(log2FC) %in% meta)

custom_heatmap(met, metFC, title = "Metastasis")

Session information

For reproducibility.

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_AU.UTF-8        LC_COLLATE=en_AU.UTF-8    
##  [5] LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
##  [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] enrichplot_1.24.4           viridis_0.6.5              
##  [3] viridisLite_0.4.2           ggplot2_3.5.1              
##  [5] readxl_1.4.3                stringr_1.5.1              
##  [7] pathview_1.44.0             circlize_0.4.16            
##  [9] RColorBrewer_1.1-3          ComplexHeatmap_2.20.0      
## [11] tidyr_1.3.1                 dplyr_1.1.4                
## [13] kableExtra_1.4.0            limma_3.60.4               
## [15] mitch_1.16.0                DESeq2_1.44.0              
## [17] SummarizedExperiment_1.34.0 Biobase_2.64.0             
## [19] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## [21] GenomicRanges_1.56.1        GenomeInfoDb_1.40.1        
## [23] IRanges_2.38.1              S4Vectors_0.42.1           
## [25] BiocGenerics_0.50.0         gplots_3.1.3.1             
## [27] reshape2_1.4.4             
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.4.2           later_1.3.2             bitops_1.0-8           
##   [4] ggplotify_0.1.2         tibble_3.2.1            R.oo_1.26.0            
##   [7] cellranger_1.1.0        polyclip_1.10-7         graph_1.82.0           
##  [10] XML_3.99-0.17           lifecycle_1.0.4         httr2_1.0.3            
##  [13] doParallel_1.0.17       lattice_0.22-6          MASS_7.3-61            
##  [16] magrittr_2.0.3          sass_0.4.9              rmarkdown_2.28         
##  [19] jquerylib_0.1.4         yaml_2.3.10             httpuv_1.6.15          
##  [22] cowplot_1.1.3           DBI_1.2.3               abind_1.4-5            
##  [25] zlibbioc_1.50.0         purrr_1.0.2             R.utils_2.12.3         
##  [28] ggraph_2.2.1            RCurl_1.98-1.16         yulab.utils_0.1.7      
##  [31] tweenr_2.0.3            rappdirs_0.3.3          GenomeInfoDbData_1.2.12
##  [34] ggrepel_0.9.6           tidytree_0.4.6          svglite_2.1.3          
##  [37] codetools_0.2-20        DelayedArray_0.30.1     DOSE_3.30.5            
##  [40] xml2_1.3.6              ggforce_0.4.2           tidyselect_1.2.1       
##  [43] shape_1.4.6.1           aplot_0.2.3             UCSC.utils_1.0.0       
##  [46] farver_2.1.2            jsonlite_1.8.8          GetoptLong_1.0.5       
##  [49] tidygraph_1.3.1         iterators_1.0.14        systemfonts_1.1.0      
##  [52] foreach_1.5.2           tools_4.4.2             treeio_1.28.0          
##  [55] Rcpp_1.0.13             glue_1.7.0              gridExtra_2.3          
##  [58] SparseArray_1.4.8       xfun_0.49               qvalue_2.36.0          
##  [61] withr_3.0.1             fastmap_1.2.0           GGally_2.2.1           
##  [64] fansi_1.0.6             caTools_1.18.3          digest_0.6.37          
##  [67] gridGraphics_0.5-1      R6_2.5.1                mime_0.12              
##  [70] colorspace_2.1-1        Cairo_1.6-2             GO.db_3.19.1           
##  [73] gtools_3.9.5            RSQLite_2.3.7           R.methodsS3_1.8.2      
##  [76] utf8_1.2.4              generics_0.1.3          data.table_1.16.0      
##  [79] graphlayouts_1.1.1      httr_1.4.7              htmlwidgets_1.6.4      
##  [82] S4Arrays_1.4.1          scatterpie_0.2.4        ggstats_0.6.0          
##  [85] pkgconfig_2.0.3         gtable_0.3.5            blob_1.2.4             
##  [88] XVector_0.44.0          shadowtext_0.1.4        htmltools_0.5.8.1      
##  [91] fgsea_1.30.0            echarts4r_0.4.5         clue_0.3-65            
##  [94] scales_1.3.0            png_0.1-8               ggfun_0.1.6            
##  [97] knitr_1.48              rstudioapi_0.16.0       rjson_0.2.22           
## [100] nlme_3.1-166            org.Hs.eg.db_3.19.1     cachem_1.1.0           
## [103] GlobalOptions_0.1.2     KernSmooth_2.23-24      parallel_4.4.2         
## [106] AnnotationDbi_1.66.0    pillar_1.9.0            vctrs_0.6.5            
## [109] promises_1.3.0          xtable_1.8-4            cluster_2.1.6          
## [112] beeswarm_0.4.0          Rgraphviz_2.48.0        evaluate_0.24.0        
## [115] KEGGgraph_1.64.0        magick_2.8.4            cli_3.6.3              
## [118] locfit_1.5-9.10         compiler_4.4.2          rlang_1.1.4            
## [121] crayon_1.5.3            labeling_0.4.3          plyr_1.8.9             
## [124] fs_1.6.4                stringi_1.8.4           BiocParallel_1.38.0    
## [127] munsell_0.5.1           Biostrings_2.72.1       lazyeval_0.2.2         
## [130] GOSemSim_2.30.2         Matrix_1.7-0            patchwork_1.2.0        
## [133] bit64_4.0.5             KEGGREST_1.44.1         statmod_1.5.0          
## [136] shiny_1.9.1             highr_0.11              igraph_2.0.3           
## [139] memoise_2.0.1           bslib_0.8.0             ggtree_3.12.0          
## [142] fastmatch_1.1-4         bit_4.0.5               ape_5.8